; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt < %s -passes=instcombine -S -data-layout="E" | FileCheck %s --check-prefixes=ALL,BE
; RUN: opt < %s -passes=instcombine -S -data-layout="e" | FileCheck %s --check-prefixes=ALL,LE


declare void @use(i16)
declare void @use_vec(<8 x i16>)

define <4 x i16> @insert_01_poison_v4i16(i32 %x) {
; BE-LABEL: @insert_01_poison_v4i16(
; BE-NEXT:    [[HI32:%.*]] = lshr i32 [[X:%.*]], 16
; BE-NEXT:    [[HI16:%.*]] = trunc i32 [[HI32]] to i16
; BE-NEXT:    [[LO16:%.*]] = trunc i32 [[X]] to i16
; BE-NEXT:    [[INS0:%.*]] = insertelement <4 x i16> poison, i16 [[LO16]], i64 0
; BE-NEXT:    [[INS1:%.*]] = insertelement <4 x i16> [[INS0]], i16 [[HI16]], i64 1
; BE-NEXT:    ret <4 x i16> [[INS1]]
;
; LE-LABEL: @insert_01_poison_v4i16(
; LE-NEXT:    [[TMP1:%.*]] = insertelement <2 x i32> poison, i32 [[X:%.*]], i64 0
; LE-NEXT:    [[INS1:%.*]] = bitcast <2 x i32> [[TMP1]] to <4 x i16>
; LE-NEXT:    ret <4 x i16> [[INS1]]
;
  %hi32 = lshr i32 %x, 16
  %hi16 = trunc i32 %hi32 to i16
  %lo16 = trunc i32 %x to i16
  %ins0 = insertelement <4 x i16> poison, i16 %lo16, i64 0
  %ins1 = insertelement <4 x i16> %ins0, i16 %hi16, i64 1
  ret <4 x i16> %ins1
}

define <8 x i16> @insert_10_poison_v8i16(i32 %x) {
; BE-LABEL: @insert_10_poison_v8i16(
; BE-NEXT:    [[TMP1:%.*]] = insertelement <4 x i32> poison, i32 [[X:%.*]], i64 0
; BE-NEXT:    [[INS1:%.*]] = bitcast <4 x i32> [[TMP1]] to <8 x i16>
; BE-NEXT:    ret <8 x i16> [[INS1]]
;
; LE-LABEL: @insert_10_poison_v8i16(
; LE-NEXT:    [[HI32:%.*]] = lshr i32 [[X:%.*]], 16
; LE-NEXT:    [[HI16:%.*]] = trunc i32 [[HI32]] to i16
; LE-NEXT:    [[LO16:%.*]] = trunc i32 [[X]] to i16
; LE-NEXT:    [[TMP1:%.*]] = insertelement <8 x i16> poison, i16 [[HI16]], i64 0
; LE-NEXT:    [[INS1:%.*]] = insertelement <8 x i16> [[TMP1]], i16 [[LO16]], i64 1
; LE-NEXT:    ret <8 x i16> [[INS1]]
;
  %hi32 = lshr i32 %x, 16
  %hi16 = trunc i32 %hi32 to i16
  %lo16 = trunc i32 %x to i16
  %ins0 = insertelement <8 x i16> poison, i16 %lo16, i64 1
  %ins1 = insertelement <8 x i16> %ins0, i16 %hi16, i64 0
  ret <8 x i16> %ins1
}

; negative test - larger element is not aligned in the vector

define <4 x i32> @insert_12_poison_v4i32(i64 %x) {
; ALL-LABEL: @insert_12_poison_v4i32(
; ALL-NEXT:    [[HI64:%.*]] = lshr i64 [[X:%.*]], 32
; ALL-NEXT:    [[HI32:%.*]] = trunc i64 [[HI64]] to i32
; ALL-NEXT:    [[LO32:%.*]] = trunc i64 [[X]] to i32
; ALL-NEXT:    [[INS0:%.*]] = insertelement <4 x i32> poison, i32 [[LO32]], i64 1
; ALL-NEXT:    [[INS1:%.*]] = insertelement <4 x i32> [[INS0]], i32 [[HI32]], i64 2
; ALL-NEXT:    ret <4 x i32> [[INS1]]
;
  %hi64 = lshr i64 %x, 32
  %hi32 = trunc i64 %hi64 to i32
  %lo32 = trunc i64 %x to i32
  %ins0 = insertelement <4 x i32> poison, i32 %lo32, i64 1
  %ins1 = insertelement <4 x i32> %ins0, i32 %hi32, i64 2
  ret <4 x i32> %ins1
}

; negative test - larger element is not aligned in the vector

define <4 x i16> @insert_21_poison_v4i16(i32 %x) {
; ALL-LABEL: @insert_21_poison_v4i16(
; ALL-NEXT:    [[HI32:%.*]] = lshr i32 [[X:%.*]], 16
; ALL-NEXT:    [[HI16:%.*]] = trunc i32 [[HI32]] to i16
; ALL-NEXT:    [[LO16:%.*]] = trunc i32 [[X]] to i16
; ALL-NEXT:    [[TMP1:%.*]] = insertelement <4 x i16> poison, i16 [[HI16]], i64 1
; ALL-NEXT:    [[INS1:%.*]] = insertelement <4 x i16> [[TMP1]], i16 [[LO16]], i64 2
; ALL-NEXT:    ret <4 x i16> [[INS1]]
;
  %hi32 = lshr i32 %x, 16
  %hi16 = trunc i32 %hi32 to i16
  %lo16 = trunc i32 %x to i16
  %ins0 = insertelement <4 x i16> poison, i16 %lo16, i64 2
  %ins1 = insertelement <4 x i16> %ins0, i16 %hi16, i64 1
  ret <4 x i16> %ins1
}

define <4 x i32> @insert_23_poison_v4i32(i64 %x) {
; BE-LABEL: @insert_23_poison_v4i32(
; BE-NEXT:    [[HI64:%.*]] = lshr i64 [[X:%.*]], 32
; BE-NEXT:    [[HI32:%.*]] = trunc i64 [[HI64]] to i32
; BE-NEXT:    [[LO32:%.*]] = trunc i64 [[X]] to i32
; BE-NEXT:    [[INS0:%.*]] = insertelement <4 x i32> poison, i32 [[LO32]], i64 2
; BE-NEXT:    [[INS1:%.*]] = insertelement <4 x i32> [[INS0]], i32 [[HI32]], i64 3
; BE-NEXT:    ret <4 x i32> [[INS1]]
;
; LE-LABEL: @insert_23_poison_v4i32(
; LE-NEXT:    [[TMP1:%.*]] = insertelement <2 x i64> poison, i64 [[X:%.*]], i64 1
; LE-NEXT:    [[INS1:%.*]] = bitcast <2 x i64> [[TMP1]] to <4 x i32>
; LE-NEXT:    ret <4 x i32> [[INS1]]
;
  %hi64 = lshr i64 %x, 32
  %hi32 = trunc i64 %hi64 to i32
  %lo32 = trunc i64 %x to i32
  %ins0 = insertelement <4 x i32> poison, i32 %lo32, i64 2
  %ins1 = insertelement <4 x i32> %ins0, i32 %hi32, i64 3
  ret <4 x i32> %ins1
}

define <4 x i16> @insert_32_poison_v4i16(i32 %x) {
; BE-LABEL: @insert_32_poison_v4i16(
; BE-NEXT:    [[TMP1:%.*]] = insertelement <2 x i32> poison, i32 [[X:%.*]], i64 1
; BE-NEXT:    [[INS1:%.*]] = bitcast <2 x i32> [[TMP1]] to <4 x i16>
; BE-NEXT:    ret <4 x i16> [[INS1]]
;
; LE-LABEL: @insert_32_poison_v4i16(
; LE-NEXT:    [[HI32:%.*]] = lshr i32 [[X:%.*]], 16
; LE-NEXT:    [[HI16:%.*]] = trunc i32 [[HI32]] to i16
; LE-NEXT:    [[LO16:%.*]] = trunc i32 [[X]] to i16
; LE-NEXT:    [[TMP1:%.*]] = insertelement <4 x i16> poison, i16 [[HI16]], i64 2
; LE-NEXT:    [[INS1:%.*]] = insertelement <4 x i16> [[TMP1]], i16 [[LO16]], i64 3
; LE-NEXT:    ret <4 x i16> [[INS1]]
;
  %hi32 = lshr i32 %x, 16
  %hi16 = trunc i32 %hi32 to i16
  %lo16 = trunc i32 %x to i16
  %ins0 = insertelement <4 x i16> poison, i16 %lo16, i64 3
  %ins1 = insertelement <4 x i16> %ins0, i16 %hi16, i64 2
  ret <4 x i16> %ins1
}

; Similar to the above tests but with a non-poison base vector.

; Vector is same size as scalar, so this is just a cast.
; TODO: Could be swapped/rotated into place.

define <2 x i16> @insert_01_v2i16(i32 %x, <2 x i16> %v) {
; BE-LABEL: @insert_01_v2i16(
; BE-NEXT:    [[HI32:%.*]] = lshr i32 [[X:%.*]], 16
; BE-NEXT:    [[HI16:%.*]] = trunc i32 [[HI32]] to i16
; BE-NEXT:    [[LO16:%.*]] = trunc i32 [[X]] to i16
; BE-NEXT:    [[INS0:%.*]] = insertelement <2 x i16> poison, i16 [[LO16]], i64 0
; BE-NEXT:    [[INS1:%.*]] = insertelement <2 x i16> [[INS0]], i16 [[HI16]], i64 1
; BE-NEXT:    ret <2 x i16> [[INS1]]
;
; LE-LABEL: @insert_01_v2i16(
; LE-NEXT:    [[INS1:%.*]] = bitcast i32 [[X:%.*]] to <2 x i16>
; LE-NEXT:    ret <2 x i16> [[INS1]]
;
  %hi32 = lshr i32 %x, 16
  %hi16 = trunc i32 %hi32 to i16
  %lo16 = trunc i32 %x to i16
  %ins0 = insertelement <2 x i16> %v, i16 %lo16, i64 0
  %ins1 = insertelement <2 x i16> %ins0, i16 %hi16, i64 1
  ret <2 x i16> %ins1
}

; negative test - can't do this safely without knowing something about the base vector

define <8 x i16> @insert_10_v8i16(i32 %x, <8 x i16> %v) {
; ALL-LABEL: @insert_10_v8i16(
; ALL-NEXT:    [[HI32:%.*]] = lshr i32 [[X:%.*]], 16
; ALL-NEXT:    [[HI16:%.*]] = trunc i32 [[HI32]] to i16
; ALL-NEXT:    [[LO16:%.*]] = trunc i32 [[X]] to i16
; ALL-NEXT:    [[TMP1:%.*]] = insertelement <8 x i16> [[V:%.*]], i16 [[HI16]], i64 0
; ALL-NEXT:    [[INS1:%.*]] = insertelement <8 x i16> [[TMP1]], i16 [[LO16]], i64 1
; ALL-NEXT:    ret <8 x i16> [[INS1]]
;
  %hi32 = lshr i32 %x, 16
  %hi16 = trunc i32 %hi32 to i16
  %lo16 = trunc i32 %x to i16
  %ins0 = insertelement <8 x i16> %v, i16 %lo16, i64 1
  %ins1 = insertelement <8 x i16> %ins0, i16 %hi16, i64 0
  ret <8 x i16> %ins1
}

; negative test - larger element is not aligned in the vector

define <4 x i32> @insert_12_v4i32(i64 %x, <4 x i32> %v) {
; ALL-LABEL: @insert_12_v4i32(
; ALL-NEXT:    [[HI64:%.*]] = lshr i64 [[X:%.*]], 32
; ALL-NEXT:    [[HI32:%.*]] = trunc i64 [[HI64]] to i32
; ALL-NEXT:    [[LO32:%.*]] = trunc i64 [[X]] to i32
; ALL-NEXT:    [[INS0:%.*]] = insertelement <4 x i32> [[V:%.*]], i32 [[LO32]], i64 1
; ALL-NEXT:    [[INS1:%.*]] = insertelement <4 x i32> [[INS0]], i32 [[HI32]], i64 2
; ALL-NEXT:    ret <4 x i32> [[INS1]]
;
  %hi64 = lshr i64 %x, 32
  %hi32 = trunc i64 %hi64 to i32
  %lo32 = trunc i64 %x to i32
  %ins0 = insertelement <4 x i32> %v, i32 %lo32, i64 1
  %ins1 = insertelement <4 x i32> %ins0, i32 %hi32, i64 2
  ret <4 x i32> %ins1
}

; negative test - larger element is not aligned in the vector

define <4 x i16> @insert_21_v4i16(i32 %x, <4 x i16> %v) {
; ALL-LABEL: @insert_21_v4i16(
; ALL-NEXT:    [[HI32:%.*]] = lshr i32 [[X:%.*]], 16
; ALL-NEXT:    [[HI16:%.*]] = trunc i32 [[HI32]] to i16
; ALL-NEXT:    [[LO16:%.*]] = trunc i32 [[X]] to i16
; ALL-NEXT:    [[TMP1:%.*]] = insertelement <4 x i16> [[V:%.*]], i16 [[HI16]], i64 1
; ALL-NEXT:    [[INS1:%.*]] = insertelement <4 x i16> [[TMP1]], i16 [[LO16]], i64 2
; ALL-NEXT:    ret <4 x i16> [[INS1]]
;
  %hi32 = lshr i32 %x, 16
  %hi16 = trunc i32 %hi32 to i16
  %lo16 = trunc i32 %x to i16
  %ins0 = insertelement <4 x i16> %v, i16 %lo16, i64 2
  %ins1 = insertelement <4 x i16> %ins0, i16 %hi16, i64 1
  ret <4 x i16> %ins1
}

; negative test - can't do this safely without knowing something about the base vector

define <4 x i32> @insert_23_v4i32(i64 %x, <4 x i32> %v) {
; ALL-LABEL: @insert_23_v4i32(
; ALL-NEXT:    [[HI64:%.*]] = lshr i64 [[X:%.*]], 32
; ALL-NEXT:    [[HI32:%.*]] = trunc i64 [[HI64]] to i32
; ALL-NEXT:    [[LO32:%.*]] = trunc i64 [[X]] to i32
; ALL-NEXT:    [[INS0:%.*]] = insertelement <4 x i32> [[V:%.*]], i32 [[LO32]], i64 2
; ALL-NEXT:    [[INS1:%.*]] = insertelement <4 x i32> [[INS0]], i32 [[HI32]], i64 3
; ALL-NEXT:    ret <4 x i32> [[INS1]]
;
  %hi64 = lshr i64 %x, 32
  %hi32 = trunc i64 %hi64 to i32
  %lo32 = trunc i64 %x to i32
  %ins0 = insertelement <4 x i32> %v, i32 %lo32, i64 2
  %ins1 = insertelement <4 x i32> %ins0, i32 %hi32, i64 3
  ret <4 x i32> %ins1
}

; negative test - can't do this safely without knowing something about the base vector

define <4 x i16> @insert_32_v4i16(i32 %x, <4 x i16> %v) {
; ALL-LABEL: @insert_32_v4i16(
; ALL-NEXT:    [[HI32:%.*]] = lshr i32 [[X:%.*]], 16
; ALL-NEXT:    [[HI16:%.*]] = trunc i32 [[HI32]] to i16
; ALL-NEXT:    [[LO16:%.*]] = trunc i32 [[X]] to i16
; ALL-NEXT:    [[TMP1:%.*]] = insertelement <4 x i16> [[V:%.*]], i16 [[HI16]], i64 2
; ALL-NEXT:    [[INS1:%.*]] = insertelement <4 x i16> [[TMP1]], i16 [[LO16]], i64 3
; ALL-NEXT:    ret <4 x i16> [[INS1]]
;
  %hi32 = lshr i32 %x, 16
  %hi16 = trunc i32 %hi32 to i16
  %lo16 = trunc i32 %x to i16
  %ins0 = insertelement <4 x i16> %v, i16 %lo16, i64 3
  %ins1 = insertelement <4 x i16> %ins0, i16 %hi16, i64 2
  ret <4 x i16> %ins1
}

; negative test - need half-width shift

define <4 x i16> @insert_01_v4i16_wrong_shift1(i32 %x) {
; ALL-LABEL: @insert_01_v4i16_wrong_shift1(
; ALL-NEXT:    [[HI32:%.*]] = lshr i32 [[X:%.*]], 8
; ALL-NEXT:    [[HI16:%.*]] = trunc i32 [[HI32]] to i16
; ALL-NEXT:    [[LO16:%.*]] = trunc i32 [[X]] to i16
; ALL-NEXT:    [[INS0:%.*]] = insertelement <4 x i16> poison, i16 [[LO16]], i64 0
; ALL-NEXT:    [[INS1:%.*]] = insertelement <4 x i16> [[INS0]], i16 [[HI16]], i64 1
; ALL-NEXT:    ret <4 x i16> [[INS1]]
;
  %hi32 = lshr i32 %x, 8
  %hi16 = trunc i32 %hi32 to i16
  %lo16 = trunc i32 %x to i16
  %ins0 = insertelement <4 x i16> poison, i16 %lo16, i64 0
  %ins1 = insertelement <4 x i16> %ins0, i16 %hi16, i64 1
  ret <4 x i16> %ins1
}

; negative test - need common scalar

define <4 x i16> @insert_01_v4i16_wrong_op(i32 %x, i32 %y) {
; ALL-LABEL: @insert_01_v4i16_wrong_op(
; ALL-NEXT:    [[HI32:%.*]] = lshr i32 [[X:%.*]], 16
; ALL-NEXT:    [[HI16:%.*]] = trunc i32 [[HI32]] to i16
; ALL-NEXT:    [[LO16:%.*]] = trunc i32 [[Y:%.*]] to i16
; ALL-NEXT:    [[INS0:%.*]] = insertelement <4 x i16> poison, i16 [[LO16]], i64 0
; ALL-NEXT:    [[INS1:%.*]] = insertelement <4 x i16> [[INS0]], i16 [[HI16]], i64 1
; ALL-NEXT:    ret <4 x i16> [[INS1]]
;
  %hi32 = lshr i32 %x, 16
  %hi16 = trunc i32 %hi32 to i16
  %lo16 = trunc i32 %y to i16
  %ins0 = insertelement <4 x i16> poison, i16 %lo16, i64 0
  %ins1 = insertelement <4 x i16> %ins0, i16 %hi16, i64 1
  ret <4 x i16> %ins1
}

; TODO: extra use doesn't have to prevent the fold.

define <8 x i16> @insert_67_v4i16_uses1(i32 %x, <8 x i16> %v) {
; ALL-LABEL: @insert_67_v4i16_uses1(
; ALL-NEXT:    [[HI32:%.*]] = lshr i32 [[X:%.*]], 16
; ALL-NEXT:    [[HI16:%.*]] = trunc i32 [[HI32]] to i16
; ALL-NEXT:    call void @use(i16 [[HI16]])
; ALL-NEXT:    [[LO16:%.*]] = trunc i32 [[X]] to i16
; ALL-NEXT:    [[INS0:%.*]] = insertelement <8 x i16> [[V:%.*]], i16 [[LO16]], i64 6
; ALL-NEXT:    [[INS1:%.*]] = insertelement <8 x i16> [[INS0]], i16 [[HI16]], i64 7
; ALL-NEXT:    ret <8 x i16> [[INS1]]
;
  %hi32 = lshr i32 %x, 16
  %hi16 = trunc i32 %hi32 to i16
  call void @use(i16 %hi16)
  %lo16 = trunc i32 %x to i16
  %ins0 = insertelement <8 x i16> %v, i16 %lo16, i64 6
  %ins1 = insertelement <8 x i16> %ins0, i16 %hi16, i64 7
  ret <8 x i16> %ins1
}

; negative test - can't do this safely without knowing something about the base vector
; extra use would be ok

define <8 x i16> @insert_76_v4i16_uses2(i32 %x, <8 x i16> %v) {
; ALL-LABEL: @insert_76_v4i16_uses2(
; ALL-NEXT:    [[HI32:%.*]] = lshr i32 [[X:%.*]], 16
; ALL-NEXT:    [[HI16:%.*]] = trunc i32 [[HI32]] to i16
; ALL-NEXT:    [[LO16:%.*]] = trunc i32 [[X]] to i16
; ALL-NEXT:    call void @use(i16 [[LO16]])
; ALL-NEXT:    [[TMP1:%.*]] = insertelement <8 x i16> [[V:%.*]], i16 [[HI16]], i64 6
; ALL-NEXT:    [[INS1:%.*]] = insertelement <8 x i16> [[TMP1]], i16 [[LO16]], i64 7
; ALL-NEXT:    ret <8 x i16> [[INS1]]
;
  %hi32 = lshr i32 %x, 16
  %hi16 = trunc i32 %hi32 to i16
  %lo16 = trunc i32 %x to i16
  call void @use(i16 %lo16)
  %ins0 = insertelement <8 x i16> %v, i16 %lo16, i64 7
  %ins1 = insertelement <8 x i16> %ins0, i16 %hi16, i64 6
  ret <8 x i16> %ins1
}

; TODO: extra use doesn't have to prevent the fold.

define <8 x i16> @insert_67_v4i16_uses3(i32 %x, <8 x i16> %v) {
; ALL-LABEL: @insert_67_v4i16_uses3(
; ALL-NEXT:    [[HI32:%.*]] = lshr i32 [[X:%.*]], 16
; ALL-NEXT:    [[HI16:%.*]] = trunc i32 [[HI32]] to i16
; ALL-NEXT:    [[LO16:%.*]] = trunc i32 [[X]] to i16
; ALL-NEXT:    [[INS0:%.*]] = insertelement <8 x i16> [[V:%.*]], i16 [[LO16]], i64 6
; ALL-NEXT:    call void @use_vec(<8 x i16> [[INS0]])
; ALL-NEXT:    [[INS1:%.*]] = insertelement <8 x i16> [[INS0]], i16 [[HI16]], i64 7
; ALL-NEXT:    ret <8 x i16> [[INS1]]
;
  %hi32 = lshr i32 %x, 16
  %hi16 = trunc i32 %hi32 to i16
  %lo16 = trunc i32 %x to i16
  %ins0 = insertelement <8 x i16> %v, i16 %lo16, i64 6
  call void @use_vec(<8 x i16> %ins0)
  %ins1 = insertelement <8 x i16> %ins0, i16 %hi16, i64 7
  ret <8 x i16> %ins1
}

; TODO: This is equivalent to the 1st test.

define <4 x i16> @insert_01_poison_v4i16_high_first(i32 %x) {
; BE-LABEL: @insert_01_poison_v4i16_high_first(
; BE-NEXT:    [[HI32:%.*]] = lshr i32 [[X:%.*]], 16
; BE-NEXT:    [[HI16:%.*]] = trunc i32 [[HI32]] to i16
; BE-NEXT:    [[LO16:%.*]] = trunc i32 [[X]] to i16
; BE-NEXT:    [[TMP1:%.*]] = insertelement <4 x i16> poison, i16 [[LO16]], i64 0
; BE-NEXT:    [[INS0:%.*]] = insertelement <4 x i16> [[TMP1]], i16 [[HI16]], i64 1
; BE-NEXT:    ret <4 x i16> [[INS0]]
;
; LE-LABEL: @insert_01_poison_v4i16_high_first(
; LE-NEXT:    [[TMP1:%.*]] = insertelement <2 x i32> poison, i32 [[X:%.*]], i64 0
; LE-NEXT:    [[INS0:%.*]] = bitcast <2 x i32> [[TMP1]] to <4 x i16>
; LE-NEXT:    ret <4 x i16> [[INS0]]
;
  %hi32 = lshr i32 %x, 16
  %hi16 = trunc i32 %hi32 to i16
  %lo16 = trunc i32 %x to i16
  %ins1 = insertelement <4 x i16> poison, i16 %hi16, i64 1
  %ins0 = insertelement <4 x i16> %ins1, i16 %lo16, i64 0
  ret <4 x i16> %ins0
}
